library(data.table)
library(tidyverse)
## -- Attaching packages ------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts --------------------------- tidyverse_conflicts() --
## x dplyr::between()   masks data.table::between()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
weather <- fread("weatherAUS.csv", data.table = FALSE)
weather.sydney <- 
  weather %>% 
  filter(Location == "Sydney")

weather.sydney$Date <- as.Date(weather.sydney$Date)
weather.sydney$RainTomorrow <- as.factor(weather.sydney$RainTomorrow)
weather.sydney$RainTomorrow[is.na(weather.sydney$RainTomorrow)] <- as.factor("No")
weather.sydney %>% 
  ggplot(aes(x = Rainfall)) + geom_histogram(binwidth = 3)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

norainy.day <- 
  weather.sydney %>% filter(Rainfall == 0) %>% nrow()
paste0(norainy.day/nrow(weather.sydney) * 100, "%" )
## [1] "60.5933473179503%"
prop.table(table(weather.sydney$RainTomorrow))
## 
##        No       Yes 
## 0.7407851 0.2592149
interactive_plot <- ggplot(weather.sydney, aes(x=Date, y = MaxTemp)) +
  geom_line() + geom_smooth()
ggplotly(interactive_plot)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
weather.sydney %>% 
  ggplot(aes(x=RainTomorrow, y=Humidity3pm, colour = RainTomorrow, fill= RainTomorrow)) + geom_violin()
## Warning: Removed 13 rows containing non-finite values (stat_ydensity).

weather.sydney %>% 
  ggplot(aes(x=RainTomorrow, y=Pressure3pm, colour = RainTomorrow, fill= RainTomorrow)) + 
  geom_violin()
## Warning: Removed 19 rows containing non-finite values (stat_ydensity).

weather.sydney %>% 
  ggplot(aes(x=RainTomorrow, y=Rainfall, colour = RainTomorrow, fill= RainTomorrow)) + geom_violin()
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).

num_data <- weather.sydney %>% 
  select(contains("Temp"), Rainfall, contains("Spedd"), 
         contains("Humidity"), contains("Pressure"), RISK_MM)
pairs(num_data)

weather.sydney %>% 
  ggplot(aes(x=Humidity3pm, y = Rainfall)) + 
  geom_point() + geom_smooth(method = "lm")
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).

humid_Tomorrowrain <- 
  weather.sydney %>% 
  select(Date, Humidity3pm, RainToday, RainTomorrow, Rainfall) %>% 
  transform(Tomorrow.Rainfall = c(Rainfall[-1], NA))

head(humid_Tomorrowrain)
##         Date Humidity3pm RainToday RainTomorrow Rainfall Tomorrow.Rainfall
## 1 2008-02-01          84       Yes          Yes     15.6               6.0
## 2 2008-02-02          73       Yes          Yes      6.0               6.6
## 3 2008-02-03          86       Yes          Yes      6.6              18.8
## 4 2008-02-04          90       Yes          Yes     18.8              77.4
## 5 2008-02-05          74       Yes          Yes     77.4               1.6
## 6 2008-02-06          62       Yes          Yes      1.6               6.2
humid_Tomorrowrain %>% 
  ggplot(aes(x = Humidity3pm, y = Tomorrow.Rainfall)) + 
  geom_point() + geom_smooth(method='lm')
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).